<<<<<<< HEAD
library(ChromCom)
library(ggplot2)
library(gridExtra)
library(reshape2)
=======
library(mylib)
library(ChromCom)
library(ggplot2)
library(gridExtra)
library(reshape2)
library(parallel)
binDir <- "../RData"
>>>>>>> 01b0e8d10be96d37a253e2c8c213bf6e968042ba
11/07/2017
Writing introduction. First script to do simulation.
12/07/2017
Let’s try if it works. An example for \(t_1 = -60\) min, \(\Delta t = 0\), \(k_1 = 0.04\ {\rm min}^{-1}\) and \(k_2 = 0.04\ {\rm min}^{-1}\).
<<<<<<< HEAD
pars <- list(
t1 = -60,
dt = 0,
r1 = 0.04,
r2 = 0.04
=======
pars <- c3pars(
t1 = -60,
dt2 = 0,
k1 = 0.04,
k2 = 0.04
>>>>>>> 01b0e8d10be96d37a253e2c8c213bf6e968042ba
)
chr <- ChromCom3(pars)
chr <- generateCells(chr, nsim=1000)
plotTimelines(chr)
<<<<<<< HEAD

=======

>>>>>>> 01b0e8d10be96d37a253e2c8c213bf6e968042ba
Just to check if output is as expected. With \(k_2=0\) we should have pure exponential decay. The yellow curve is \(e^{-k_1 t}\).
<<<<<<< HEAD
pars <- list(
t1 = -60,
dt = 0,
r1 = 0.04,
r2 = 0
=======
pars <- c3pars(
t1 = -60,
dt2 = 0,
k1 = 0.04,
k2 = 0
>>>>>>> 01b0e8d10be96d37a253e2c8c213bf6e968042ba
)
tchr <- ChromCom3(pars)
tchr <- generateCells(tchr, nsim=1000)
x <- seq(from=pars$t1, to=100, by=1)
<<<<<<< HEAD
y <- exp(-pars$r1 * (x - pars$t1))
=======
y <- exp(-pars$k1 * (x - pars$t1))
>>>>>>> 01b0e8d10be96d37a253e2c8c213bf6e968042ba
g <- plotTimelines(tchr)
g + geom_line(data=data.frame(x=x, y=y), aes(x,y), colour="yellow")
<<<<<<< HEAD

=======

>>>>>>> 01b0e8d10be96d37a253e2c8c213bf6e968042ba
Close, but not perfect. Here is an example with much smaller time step (0.1) and larger number of cells (10,000).
<<<<<<< HEAD
pars <- list(
t1 = -60,
dt = 0,
r1 = 0.04,
r2 = 0
=======
pars <- c3pars(
t1 = -60,
dt2 = 0,
k1 = 0.04,
k2 = 0
>>>>>>> 01b0e8d10be96d37a253e2c8c213bf6e968042ba
)
tchr <- ChromCom3(pars, time = seq(from=-140, to=90, by=0.1))
tchr <- generateCells(tchr, nsim=10000)
x <- seq(from=pars$t1, to=100, by=1)
<<<<<<< HEAD
y <- exp(-pars$r1 * (x - pars$t1))
=======
y <- exp(-pars$k1 * (x - pars$t1))
>>>>>>> 01b0e8d10be96d37a253e2c8c213bf6e968042ba
g <- plotTimelines(tchr)
g + geom_line(data=data.frame(x=x, y=y), aes(x,y), colour="yellow")
<<<<<<< HEAD

=======

>>>>>>> 01b0e8d10be96d37a253e2c8c213bf6e968042ba
Parameter grid
parameterGrid <- function(pars, par1, range1, par2, range2) {
melts <- NULL
for(p1 in range1) {
pars[[par1]] <- p1
for(p2 in range2) {
pars[[par2]] <- p2
chr <- ChromCom3(pars)
chr <- generateCells(chr)
label1 <- sprintf("%s = %.3g", par1, p1)
label2 <- sprintf("%s = %.3g", par2, p2)
m <- meltTimelines(chr, label1=label1, label2=label2)
melts <- rbind(melts, m)
}
}
timelinePanel(melts)
}
pars <- c3pars(
t1 = -30,
dt2 = 0,
k1 = 0.05,
k2 = 0.01
)
range1 <- c(0.01, 0.05, 0.08, 0.12)
range2 <- c(0.01, 0.05, 0.08, 0.12)
parameterGrid(pars, "k1", range1, "k2", range2)
<<<<<<< HEAD

=======

>>>>>>> 01b0e8d10be96d37a253e2c8c213bf6e968042ba
I think we need to introduce a delay between pink and red.
pars <- c3pars(
t1 = -30,
dt2 = 10,
k1 = 0.05,
k2 = 0.01
)
range1 <- c(0.01, 0.03, 0.05, 0.10)
range2 <- c(0.01, 0.05, 0.08, 0.12)
parameterGrid(pars, "k1", range1, "k2", range2)
<<<<<<< HEAD

=======

>>>>>>> 01b0e8d10be96d37a253e2c8c213bf6e968042ba
What about range of delays?
pars <- c3pars(
t1 = -30,
dt2 = 0,
k1 = 0.03,
k2 = 0.01
)
range1 <- c(0, 10, 20, 30)
range2 <- c(0.01, 0.05, 0.08, 0.12)
parameterGrid(pars, "dt", range1, "k2", range2)
<<<<<<< HEAD

=======

>>>>>>> 01b0e8d10be96d37a253e2c8c213bf6e968042ba
Experimental data.
echr <- experimentalData(dataFile$scramble)
plotTimelines(echr, smooth=TRUE, k=15)
<<<<<<< HEAD

=======

>>>>>>> 01b0e8d10be96d37a253e2c8c213bf6e968042ba
13/07/2017
Prepared shiny app for deployment.
24/07/2017
Tomo suggested that the model parameters can be derived directly from data. We can write the following equations
\(\dot{B} = -k_1 B\)
\(\dot{P} = k_1 B - k_2 R\)
\(\dot{R} = k_2 P\)
\(B + P + R = 1\)
<<<<<<< HEAD
However, I’m not sure if this is exacly the model I use in the simulation. In the simulation, we assumed that P->R transition can happen only after the BB->P transition.
=======
However, I’m not sure if this is exactly the model I use in the simulation. In the simulation, we assumed that P->R transition can happen only after the B->P transition.
>>>>>>> 01b0e8d10be96d37a253e2c8c213bf6e968042ba
Because proportions and their derivatives are observed, we can find the rates:
\(k_1 = - \frac{\dot{B}}{B}\)
\(k_2 = \frac{\dot{R}}{P}\)
This will require a lot of smoothing, otherwise the derivatives will be all over the place.
First, I see if I can recover rates from the generated data.
<<<<<<< HEAD
=======
timeDeriv <- function(chr, k=20) {
ts <- list()
for(col in chr$colours) {
x <- chr$cnt[[col]] / chr$cnt$total
x <- caTools::runmean(x, k)
x <- ts(x, start=chr$timepars$start, deltat=chr$timepars$step)
xdot <- diff(x)
ts[[col]] <- x
ts[[paste0(col, ".diff")]] <- xdot
}
ts
}
plotRates <- function(chr, k=20) {
ts <- timeDeriv(chr, k=k)
k1 <- -ts$B.diff / ts$B
k2 <- ts$R.diff / ts$P
k1[which(is.nan(k1))] <- NA
k2[which(is.nan(k2))] <- NA
df <- data.frame(t=time(k1), k1=k1, k2=k2)
m <- melt(df, measure.vars = c("k1", "k2"))
m$value <- as.numeric(m$value)
m$value[which(m$value > 0.5)] <- 0.5
m$value[which(m$value < -0.5)] <- -0.5
ggplot(m, aes(x=t, y=value)) + geom_line(aes(colour=variable))
}
Start with a model with 10,000 cells and 0.1 min time step. In this model \(k_1 = 0.04\) and \(k_2 = 0\). Top panels show original data, lower panels - smoothed data.
grid.arrange(plotTimelines(tchr), plotRates(tchr, k=1), ncol=2)

grid.arrange(plotTimelines(tchr, smooth=TRUE, k=200), plotRates(tchr, k=200), ncol=2)

What happens if I use only 100 cells and 1 min time step in the model? This time \(k_2 = 0.04\).
chr <- generateCells(chr, nsim=100)
grid.arrange(plotTimelines(chr, k=1), plotRates(chr, k=1), ncol=2)

grid.arrange(plotTimelines(chr, smooth=TRUE, k=20), plotRates(chr, k=20), ncol=2)

And now, for real data. Here are scramble data smoothed with running mean with window size of 10 time points.
grid.arrange(plotTimelines(echr, smooth=TRUE, k=10), plotRates(echr, k=10), ncol=2)

Now, smoothing with 80 points.
grid.arrange(plotTimelines(echr, smooth=TRUE, k=80), plotRates(echr, k=80), ncol=2)

25/07/2017
Had a chat with Tomo and John yesterday. They wanted to add a score showing how far the model is from the experimental data. I did it by calculating \(\chi^2\) summed over all three curves. It is now added to the figure in the shiny app.
The next thing is to add transition P->B, with rate \(k_3\). It can happen only after time \(\Delta t_3\).
I added a simulation method (step-by-step). Now testing it.
pars <- c3pars(
t1 = -30,
k1 = 0.04,
k2 = 0.04,
k3 = 0.04,
dt2 = 10,
dt3 = 50
)
chr <- ChromCom3(pars)
chr <- generateCells(chr, nsim=1000, method="simulation")
plotTimelines(chr)

26/07/2017
What about fitting model to the data? “nlm” doesn’t work because model is stochastic. Method “SANN” from optim (that is stochastic simulated annealing) took very long time and went into a false minimum (negative \(k_2\)), as parameters cannot be constrained.
Finally I used “L-BFGS-B” method which allows box constraints. Alas, this method is not very good at finding the real minimum, often ending up in a local one. I’m guessing this is because of my stochastic model. Therefore, I need to run it several times and search for the best minimum. Using “mclapply” to speed it up.
I also changed the error score. \(\chi^2\) doesn’t work very well when expected counts are zero. And I cannot simply exclude them, because zero is an important number. Instead I calculate a simple RMS.
pars <- c3pars()
chr <- cacheData("fit_scramble_n3000_3par", fitChr, echr, pars, npar=3, nsim=3000, ntry=16, binDir=binDir)
plotTimelines(chr, expdata = echr, withpars=TRUE)

Now, the same, but with extra parameter, \(\Delta t_2\).
pars <- c3pars(dt2=5)
chr <- cacheData("fit_scramble_n3000_4par", fitChr, echr, pars, npar=4, nsim=3000, ntry=16, binDir=binDir)
plotTimelines(chr, expdata = echr, withpars=TRUE)

And now the other two data sets.
echr2 <- experimentalData(dataFile$NCAPD2)
pars <- c3pars()
chr2 <- cacheData("fit_NCAPD2_n3000_3par", fitChr, echr2, pars, npar=3, nsim=3000, ntry=16, ncores=8, binDir=binDir)
plotTimelines(chr2, expdata = echr2, withpars=TRUE)
pars <- c3pars(dt2=10)
chr2 <- cacheData("fit_NCAPD2_n3000_4par", fitChr, echr2, pars, npar=4, nsim=3000, ntry=16, ncores=8, binDir=binDir)
plotTimelines(chr2, expdata = echr2, withpars=TRUE)
echr3 <- experimentalData(dataFile$NCAPD3)
pars <- c3pars()
chr3 <- cacheData("fit_NCAPD3_n3000_3par", fitChr, echr3, pars, npar=3, nsim=3000, ntry=16, ncores=8, binDir=binDir)
plotTimelines(chr3, expdata = echr3, withpars=TRUE)

pars <- c3pars(dt2=10)
chr3 <- cacheData("fit_NCAPD3_n3000_4par", fitChr, echr3, pars, npar=4, nsim=3000, ntry=16, ncores=8, binDir=binDir)
plotTimelines(chr3, expdata = echr3, withpars=TRUE)

>>>>>>> 01b0e8d10be96d37a253e2c8c213bf6e968042ba
timeDeriv <- function(chr, k=20) {
ts <- list()
for(col in chr$colours) {
x <- chr$cnt[[col]] / chr$cnt$total
x <- caTools::runmean(x, k)
x <- ts(x, start=chr$timepars$start, deltat=chr$timepars$step)
xdot <- diff(x)
ts[[col]] <- x
ts[[paste0(col, ".diff")]] <- xdot
}
ts
}
plotRates <- function(chr, k=20) {
ts <- timeDeriv(chr, k=k)
k1 <- -ts$BB.diff / ts$BB
k2 <- ts$R.diff / ts$P
k1[which(is.nan(k1))] <- NA
k2[which(is.nan(k2))] <- NA
df <- data.frame(t=time(k1), k1=k1, k2=k2)
m <- melt(df, measure.vars = c("k1", "k2"))
m$value <- as.numeric(m$value)
m$value[which(m$value > 0.5)] <- 0.5
m$value[which(m$value < -0.5)] <- -0.5
ggplot(m, aes(x=t, y=value)) + geom_line(aes(colour=variable))
}
Start with a model wiht 10,000 cells and 0.1 min time step. In this model \(k_1 = 0.04\) and \(k_2 = 0\). Top panels show original data, lower panels - smoothed data.
grid.arrange(plotTimelines(tchr), plotRates(tchr, k=1), ncol=2)

grid.arrange(plotTimelines(tchr, smooth=TRUE, k=200), plotRates(tchr, k=200), ncol=2)

What happens if I use only 100 cells and 1 min time step in the model? This time \(k_2 = 0.04\).
chr <- generateCells(chr, nsim=100)
grid.arrange(plotTimelines(chr, k=1), plotRates(chr, k=1), ncol=2)

grid.arrange(plotTimelines(chr, smooth=TRUE, k=20), plotRates(chr, k=20), ncol=2)

And now, for real data. Here are scramble data smoothed with running mean with window size of 10 time points.
grid.arrange(plotTimelines(echr, smooth=TRUE, k=10), plotRates(echr, k=10), ncol=2)

Now, smoothing with 80 points.
grid.arrange(plotTimelines(echr, smooth=TRUE, k=80), plotRates(echr, k=80), ncol=2)

<<<<<<< HEAD
LS0tCnRpdGxlOiAiQ2hyb21vc29tZSBjb21wYWN0aW9uIC0gc2ltcGxlIHRocmVlLXN0YXRlIG1vZGVsIgpvdXRwdXQ6IAogIGh0bWxfbm90ZWJvb2s6CiAgICB0b2M6IGZhbHNlCiAgICB0b2NfZmxvYXQ6IGZhbHNlCiAgICBjb2RlX2ZvbGRpbmc6IGhpZGUKLS0tCgpgYGB7cn0KbGlicmFyeShDaHJvbUNvbSkKbGlicmFyeShnZ3Bsb3QyKQpsaWJyYXJ5KGdyaWRFeHRyYSkKbGlicmFyeShyZXNoYXBlMikKYGBgCgojIyAxMS8wNy8yMDE3CgpXcml0aW5nIGludHJvZHVjdGlvbi4gRmlyc3Qgc2NyaXB0IHRvIGRvIHNpbXVsYXRpb24uCgoKIyMgMTIvMDcvMjAxNwoKTGV0J3MgdHJ5IGlmIGl0IHdvcmtzLiBBbiBleGFtcGxlIGZvciAkdF8xID0gLTYwJCBtaW4sICRcRGVsdGEgdCA9IDAkLCAka18xID0gMC4wNFwge1xybSBtaW59XnstMX0kIGFuZCAka18yID0gMC4wNFwge1xybSBtaW59XnstMX0kLgoKYGBge3IsIGZpZy53aWR0aD01LCBmaWcuaGVpZ2h0PTN9CnBhcnMgPC0gbGlzdCgKICB0MSA9IC02MCwKICBkdCA9IDAsCiAgcjEgPSAwLjA0LAogIHIyID0gMC4wNAopCmNociA8LSBDaHJvbUNvbTMocGFycykKY2hyIDwtIGdlbmVyYXRlQ2VsbHMoY2hyLCBuc2ltPTEwMDApCnBsb3RUaW1lbGluZXMoY2hyKQpgYGAKCkp1c3QgdG8gY2hlY2sgaWYgb3V0cHV0IGlzIGFzIGV4cGVjdGVkLiBXaXRoICRrXzI9MCQgd2Ugc2hvdWxkIGhhdmUgcHVyZSBleHBvbmVudGlhbCBkZWNheS4gVGhlIHllbGxvdyBjdXJ2ZSBpcyAkZV57LWtfMSB0fSQuCgpgYGB7ciwgZmlnLndpZHRoPTV9CnBhcnMgPC0gbGlzdCgKICB0MSA9IC02MCwKICBkdCA9IDAsCiAgcjEgPSAwLjA0LAogIHIyID0gMAopCnRjaHIgPC0gQ2hyb21Db20zKHBhcnMpCnRjaHIgPC0gZ2VuZXJhdGVDZWxscyh0Y2hyLCBuc2ltPTEwMDApCgp4IDwtIHNlcShmcm9tPXBhcnMkdDEsIHRvPTEwMCwgYnk9MSkKeSA8LSBleHAoLXBhcnMkcjEgKiAoeCAtIHBhcnMkdDEpKQpnIDwtIHBsb3RUaW1lbGluZXModGNocikKZyArIGdlb21fbGluZShkYXRhPWRhdGEuZnJhbWUoeD14LCB5PXkpLCBhZXMoeCx5KSwgY29sb3VyPSJ5ZWxsb3ciKQpgYGAKCkNsb3NlLCBidXQgbm90IHBlcmZlY3QuIEhlcmUgaXMgYW4gZXhhbXBsZSB3aXRoIG11Y2ggc21hbGxlciB0aW1lIHN0ZXAgKDAuMSkgYW5kIGxhcmdlciBudW1iZXIgb2YgY2VsbHMgKDEwLDAwMCkuCgpgYGB7ciwgZmlnLndpZHRoPTV9CnBhcnMgPC0gbGlzdCgKICB0MSA9IC02MCwKICBkdCA9IDAsCiAgcjEgPSAwLjA0LAogIHIyID0gMAopCnRjaHIgPC0gQ2hyb21Db20zKHBhcnMsIHRpbWUgPSBzZXEoZnJvbT0tMTQwLCB0bz05MCwgYnk9MC4xKSkKdGNociA8LSBnZW5lcmF0ZUNlbGxzKHRjaHIsIG5zaW09MTAwMDApCgp4IDwtIHNlcShmcm9tPXBhcnMkdDEsIHRvPTEwMCwgYnk9MSkKeSA8LSBleHAoLXBhcnMkcjEgKiAoeCAtIHBhcnMkdDEpKQpnIDwtIHBsb3RUaW1lbGluZXModGNocikKZyArIGdlb21fbGluZShkYXRhPWRhdGEuZnJhbWUoeD14LCB5PXkpLCBhZXMoeCx5KSwgY29sb3VyPSJ5ZWxsb3ciKQpgYGAKCgoKIyMjIFBhcmFtZXRlciBncmlkCgpgYGB7cn0KcGFyYW1ldGVyR3JpZCA8LSBmdW5jdGlvbihwYXJzLCBwYXIxLCByYW5nZTEsIHBhcjIsIHJhbmdlMikgewogIG1lbHRzIDwtIE5VTEwKICBmb3IocDEgaW4gcmFuZ2UxKSB7CiAgICBwYXJzW1twYXIxXV0gPC0gcDEKICAgIGZvcihwMiBpbiByYW5nZTIpIHsKICAgICAgcGFyc1tbcGFyMl1dIDwtIHAyCiAgICAgIGNociA8LSBDaHJvbUNvbTMocGFycykKICAgICAgY2hyIDwtIGdlbmVyYXRlQ2VsbHMoY2hyKQogICAgICBsYWJlbDEgPC0gc3ByaW50ZigiJXMgPSAlLjNnIiwgcGFyMSwgcDEpCiAgICAgIGxhYmVsMiA8LSBzcHJpbnRmKCIlcyA9ICUuM2ciLCBwYXIyLCBwMikKICAgICAgbSA8LSBtZWx0VGltZWxpbmVzKGNociwgbGFiZWwxPWxhYmVsMSwgbGFiZWwyPWxhYmVsMikKICAgICAgbWVsdHMgPC0gcmJpbmQobWVsdHMsIG0pCiAgICB9CiAgfQogIHRpbWVsaW5lUGFuZWwobWVsdHMpCn0KYGBgCgpgYGB7cn0KcGFycyA8LSBsaXN0KAogIHQxID0gLTMwLAogIGR0ID0gMCwKICByMSA9IDAuMDUsCiAgcjIgPSAwLjAxCikKCnJhbmdlMSA8LSBjKDAuMDEsIDAuMDUsIDAuMDgsIDAuMTIpCnJhbmdlMiA8LSBjKDAuMDEsIDAuMDUsIDAuMDgsIDAuMTIpCnBhcmFtZXRlckdyaWQocGFycywgInIxIiwgcmFuZ2UxLCAicjIiLCByYW5nZTIpCmBgYAogSSB0aGluayB3ZSBuZWVkIHRvIGludHJvZHVjZSBhIGRlbGF5IGJldHdlZW4gcGluayBhbmQgcmVkLgogCiAKYGBge3J9CnBhcnMgPC0gbGlzdCgKICB0MSA9IC0zMCwKICBkdCA9IDEwLAogIHIxID0gMC4wNSwKICByMiA9IDAuMDEKKQoKcmFuZ2UxIDwtIGMoMC4wMSwgMC4wMywgMC4wNSwgMC4xMCkKcmFuZ2UyIDwtIGMoMC4wMSwgMC4wNSwgMC4wOCwgMC4xMikKcGFyYW1ldGVyR3JpZChwYXJzLCAicjEiLCByYW5nZTEsICJyMiIsIHJhbmdlMikKYGBgCgpXaGF0IGFib3V0IHJhbmdlIG9mIGRlbGF5cz8KCmBgYHtyfQpwYXJzIDwtIGxpc3QoCiAgdDEgPSAtMzAsCiAgZHQgPSAwLAogIHIxID0gMC4wMywKICByMiA9IDAuMDEKKQoKcmFuZ2UxIDwtIGMoMCwgMTAsIDIwLCAzMCkKcmFuZ2UyIDwtIGMoMC4wMSwgMC4wNSwgMC4wOCwgMC4xMikKcGFyYW1ldGVyR3JpZChwYXJzLCAiZHQiLCByYW5nZTEsICJyMiIsIHJhbmdlMikKYGBgCgpFeHBlcmltZW50YWwgZGF0YS4KCmBgYHtyLCB3YXJuaW5nPUZBTFNFLCBmaWcud2lkdGg9NX0KZWNociA8LSBleHBlcmltZW50YWxEYXRhKGRhdGFGaWxlJHNjcmFtYmxlKQpwbG90VGltZWxpbmVzKGVjaHIsIHNtb290aD1UUlVFLCBrPTE1KQpgYGAKCgojIyAxMy8wNy8yMDE3CgpQcmVwYXJlZCBbc2hpbnkgYXBwXShodHRwczovL3NoaW55LmNvbXBiaW8uZHVuZGVlLmFjLnVrL21hcmVrX2Nocm9tY29tL3BhcmFtX3R1bmVyLykgZm9yIGRlcGxveW1lbnQuCgojIyAyNC8wNy8yMDE3CgpUb21vIHN1Z2dlc3RlZCB0aGF0IHRoZSBtb2RlbCBwYXJhbWV0ZXJzIGNhbiBiZSBkZXJpdmVkIGRpcmVjdGx5IGZyb20gZGF0YS4gV2UgY2FuIHdyaXRlIHRoZSBmb2xsb3dpbmcgZXF1YXRpb25zCgokXGRvdHtCfSA9IC1rXzEgQiQKCiRcZG90e1B9ID0ga18xIEIgLSBrXzIgUiQKCiRcZG90e1J9ID0ga18yIFAkCgokQiArIFAgKyBSID0gMSQKCkhvd2V2ZXIsIEknbSBub3Qgc3VyZSBpZiB0aGlzIGlzIGV4YWNseSB0aGUgbW9kZWwgSSB1c2UgaW4gdGhlIHNpbXVsYXRpb24uIEluIHRoZSBzaW11bGF0aW9uLCB3ZSBhc3N1bWVkIHRoYXQgUC0+UiB0cmFuc2l0aW9uIGNhbiBoYXBwZW4gb25seSBhZnRlciB0aGUgQkItPlAgdHJhbnNpdGlvbi4gCgpCZWNhdXNlIHByb3BvcnRpb25zIGFuZCB0aGVpciBkZXJpdmF0aXZlcyBhcmUgb2JzZXJ2ZWQsIHdlIGNhbiBmaW5kIHRoZSByYXRlczoKCiRrXzEgPSAtIFxmcmFje1xkb3R7Qn19e0J9JAoKJGtfMiA9IFxmcmFje1xkb3R7Un19e1B9JAoKVGhpcyB3aWxsIHJlcXVpcmUgYSBsb3Qgb2Ygc21vb3RoaW5nLCBvdGhlcndpc2UgdGhlIGRlcml2YXRpdmVzIHdpbGwgYmUgYWxsIG92ZXIgdGhlIHBsYWNlLgoKRmlyc3QsIEkgc2VlIGlmIEkgY2FuIHJlY292ZXIgcmF0ZXMgZnJvbSB0aGUgZ2VuZXJhdGVkIGRhdGEuCgpgYGB7cn0KdGltZURlcml2IDwtIGZ1bmN0aW9uKGNociwgaz0yMCkgewogIHRzIDwtIGxpc3QoKQogIGZvcihjb2wgaW4gY2hyJGNvbG91cnMpIHsKICAgIHggPC0gY2hyJGNudFtbY29sXV0gLyBjaHIkY250JHRvdGFsCiAgICB4IDwtIGNhVG9vbHM6OnJ1bm1lYW4oeCwgaykKICAgIHggPC0gdHMoeCwgc3RhcnQ9Y2hyJHRpbWVwYXJzJHN0YXJ0LCBkZWx0YXQ9Y2hyJHRpbWVwYXJzJHN0ZXApCiAgICB4ZG90IDwtIGRpZmYoeCkKICAgIHRzW1tjb2xdXSA8LSB4CiAgICB0c1tbcGFzdGUwKGNvbCwgIi5kaWZmIildXSA8LSB4ZG90CiAgfQogIHRzCn0KYGBgCgpgYGB7cn0KcGxvdFJhdGVzIDwtIGZ1bmN0aW9uKGNociwgaz0yMCkgewogIHRzIDwtIHRpbWVEZXJpdihjaHIsIGs9aykKICBrMSA8LSAtdHMkQkIuZGlmZiAvIHRzJEJCCiAgazIgPC0gdHMkUi5kaWZmIC8gdHMkUAogIGsxW3doaWNoKGlzLm5hbihrMSkpXSA8LSBOQQogIGsyW3doaWNoKGlzLm5hbihrMikpXSA8LSBOQQogIGRmIDwtIGRhdGEuZnJhbWUodD10aW1lKGsxKSwgazE9azEsIGsyPWsyKQogIG0gPC0gbWVsdChkZiwgbWVhc3VyZS52YXJzID0gYygiazEiLCAiazIiKSkKICBtJHZhbHVlIDwtIGFzLm51bWVyaWMobSR2YWx1ZSkKICBtJHZhbHVlW3doaWNoKG0kdmFsdWUgPiAwLjUpXSA8LSAwLjUKICBtJHZhbHVlW3doaWNoKG0kdmFsdWUgPCAtMC41KV0gPC0gLTAuNQogIGdncGxvdChtLCBhZXMoeD10LCB5PXZhbHVlKSkgKyBnZW9tX2xpbmUoYWVzKGNvbG91cj12YXJpYWJsZSkpIAp9CmBgYAoKU3RhcnQgd2l0aCBhIG1vZGVsIHdpaHQgMTAsMDAwIGNlbGxzIGFuZCAwLjEgbWluIHRpbWUgc3RlcC4gSW4gdGhpcyBtb2RlbCAka18xID0gMC4wNCQgYW5kICRrXzIgPSAwJC4gIFRvcCBwYW5lbHMgc2hvdyBvcmlnaW5hbCBkYXRhLCBsb3dlciBwYW5lbHMgLSBzbW9vdGhlZCBkYXRhLgoKYGBge3IsIGZpZy53aWR0aD04LCBmaWcuaGVpZ2h0PTN9CmdyaWQuYXJyYW5nZShwbG90VGltZWxpbmVzKHRjaHIpLCBwbG90UmF0ZXModGNociwgaz0xKSwgbmNvbD0yKQpncmlkLmFycmFuZ2UocGxvdFRpbWVsaW5lcyh0Y2hyLCBzbW9vdGg9VFJVRSwgaz0yMDApLCBwbG90UmF0ZXModGNociwgaz0yMDApLCBuY29sPTIpCmBgYAoKCldoYXQgaGFwcGVucyBpZiBJIHVzZSBvbmx5IDEwMCBjZWxscyBhbmQgMSBtaW4gdGltZSBzdGVwIGluIHRoZSBtb2RlbD8gVGhpcyB0aW1lICRrXzIgPSAwLjA0JC4KCmBgYHtyLCBmaWcud2lkdGg9OSwgZmlnLmhlaWdodD0zfQpjaHIgPC0gZ2VuZXJhdGVDZWxscyhjaHIsIG5zaW09MTAwKQpncmlkLmFycmFuZ2UocGxvdFRpbWVsaW5lcyhjaHIsIGs9MSksIHBsb3RSYXRlcyhjaHIsIGs9MSksIG5jb2w9MikKZ3JpZC5hcnJhbmdlKHBsb3RUaW1lbGluZXMoY2hyLCBzbW9vdGg9VFJVRSwgaz0yMCksIHBsb3RSYXRlcyhjaHIsIGs9MjApLCBuY29sPTIpCmBgYAoKCkFuZCBub3csIGZvciByZWFsIGRhdGEuIEhlcmUgYXJlIHNjcmFtYmxlIGRhdGEgc21vb3RoZWQgd2l0aCBydW5uaW5nIG1lYW4gd2l0aCB3aW5kb3cgc2l6ZSBvZiAxMCB0aW1lIHBvaW50cy4KCmBgYHtyLCBmaWcud2lkdGg9OSwgZmlnLmhlaWdodD0zfQpncmlkLmFycmFuZ2UocGxvdFRpbWVsaW5lcyhlY2hyLCBzbW9vdGg9VFJVRSwgaz0xMCksIHBsb3RSYXRlcyhlY2hyLCBrPTEwKSwgbmNvbD0yKQpgYGAKCk5vdywgc21vb3RoaW5nIHdpdGggODAgcG9pbnRzLgoKYGBge3IsIGZpZy53aWR0aD05LCBmaWcuaGVpZ2h0PTN9CmdyaWQuYXJyYW5nZShwbG90VGltZWxpbmVzKGVjaHIsIHNtb290aD1UUlVFLCBrPTgwKSwgcGxvdFJhdGVzKGVjaHIsIGs9ODApLCBuY29sPTIpCmBgYAo=
=======
---
title: "Chromosome compaction - simple three-state model"
output: 
  html_notebook:
    toc: false
    toc_float: false
    code_folding: hide
---

```{r}
library(mylib)
library(ChromCom)
library(ggplot2)
library(gridExtra)
library(reshape2)
library(parallel)

binDir <- "../RData"
```

## 11/07/2017

Writing introduction. First script to do simulation.


## 12/07/2017

Let's try if it works. An example for $t_1 = -60$ min, $\Delta t = 0$, $k_1 = 0.04\ {\rm min}^{-1}$ and $k_2 = 0.04\ {\rm min}^{-1}$.

```{r, fig.width=5, fig.height=3}
pars <- c3pars(
  t1 = -60,
  dt2 = 0,
  k1 = 0.04,
  k2 = 0.04
)
chr <- ChromCom3(pars)
chr <- generateCells(chr, nsim=1000)
plotTimelines(chr)
```

Just to check if output is as expected. With $k_2=0$ we should have pure exponential decay. The yellow curve is $e^{-k_1 t}$.

```{r, fig.width=5}
pars <- c3pars(
  t1 = -60,
  dt2 = 0,
  k1 = 0.04,
  k2 = 0
)
tchr <- ChromCom3(pars)
tchr <- generateCells(tchr, nsim=1000)

x <- seq(from=pars$t1, to=100, by=1)
y <- exp(-pars$k1 * (x - pars$t1))
g <- plotTimelines(tchr)
g + geom_line(data=data.frame(x=x, y=y), aes(x,y), colour="yellow")
```

Close, but not perfect. Here is an example with much smaller time step (0.1) and larger number of cells (10,000).

```{r, fig.width=5}
pars <- c3pars(
  t1 = -60,
  dt2 = 0,
  k1 = 0.04,
  k2 = 0
)
tchr <- ChromCom3(pars, time = seq(from=-140, to=90, by=0.1))
tchr <- generateCells(tchr, nsim=10000)

x <- seq(from=pars$t1, to=100, by=1)
y <- exp(-pars$k1 * (x - pars$t1))
g <- plotTimelines(tchr)
g + geom_line(data=data.frame(x=x, y=y), aes(x,y), colour="yellow")
```



### Parameter grid

```{r}
parameterGrid <- function(pars, par1, range1, par2, range2) {
  melts <- NULL
  for(p1 in range1) {
    pars[[par1]] <- p1
    for(p2 in range2) {
      pars[[par2]] <- p2
      chr <- ChromCom3(pars)
      chr <- generateCells(chr)
      label1 <- sprintf("%s = %.3g", par1, p1)
      label2 <- sprintf("%s = %.3g", par2, p2)
      m <- meltTimelines(chr, label1=label1, label2=label2)
      melts <- rbind(melts, m)
    }
  }
  timelinePanel(melts)
}
```

```{r}
pars <- c3pars(
  t1 = -30,
  dt2 = 0,
  k1 = 0.05,
  k2 = 0.01
)

range1 <- c(0.01, 0.05, 0.08, 0.12)
range2 <- c(0.01, 0.05, 0.08, 0.12)
parameterGrid(pars, "k1", range1, "k2", range2)
```
 I think we need to introduce a delay between pink and red.
 
 
```{r}
pars <- c3pars(
  t1 = -30,
  dt2 = 10,
  k1 = 0.05,
  k2 = 0.01
)

range1 <- c(0.01, 0.03, 0.05, 0.10)
range2 <- c(0.01, 0.05, 0.08, 0.12)
parameterGrid(pars, "k1", range1, "k2", range2)
```

What about range of delays?

```{r}
pars <- c3pars(
  t1 = -30,
  dt2 = 0,
  k1 = 0.03,
  k2 = 0.01
)

range1 <- c(0, 10, 20, 30)
range2 <- c(0.01, 0.05, 0.08, 0.12)
parameterGrid(pars, "dt", range1, "k2", range2)
```

Experimental data.

```{r, warning=FALSE, fig.width=5}
echr <- experimentalData(dataFile$scramble)
plotTimelines(echr, smooth=TRUE, k=15)
```


## 13/07/2017

Prepared [shiny app](https://shiny.compbio.dundee.ac.uk/marek_chromcom/param_tuner/) for deployment.

## 24/07/2017

Tomo suggested that the model parameters can be derived directly from data. We can write the following equations

$\dot{B} = -k_1 B$

$\dot{P} = k_1 B - k_2 R$

$\dot{R} = k_2 P$

$B + P + R = 1$

However, I'm not sure if this is exactly the model I use in the simulation. In the simulation, we assumed that P->R transition can happen only after the B->P transition. 

Because proportions and their derivatives are observed, we can find the rates:

$k_1 = - \frac{\dot{B}}{B}$

$k_2 = \frac{\dot{R}}{P}$

This will require a lot of smoothing, otherwise the derivatives will be all over the place.

First, I see if I can recover rates from the generated data.

```{r}
timeDeriv <- function(chr, k=20) {
  ts <- list()
  for(col in chr$colours) {
    x <- chr$cnt[[col]] / chr$cnt$total
    x <- caTools::runmean(x, k)
    x <- ts(x, start=chr$timepars$start, deltat=chr$timepars$step)
    xdot <- diff(x)
    ts[[col]] <- x
    ts[[paste0(col, ".diff")]] <- xdot
  }
  ts
}
```

```{r}
plotRates <- function(chr, k=20) {
  ts <- timeDeriv(chr, k=k)
  k1 <- -ts$B.diff / ts$B
  k2 <- ts$R.diff / ts$P
  k1[which(is.nan(k1))] <- NA
  k2[which(is.nan(k2))] <- NA
  df <- data.frame(t=time(k1), k1=k1, k2=k2)
  m <- melt(df, measure.vars = c("k1", "k2"))
  m$value <- as.numeric(m$value)
  m$value[which(m$value > 0.5)] <- 0.5
  m$value[which(m$value < -0.5)] <- -0.5
  ggplot(m, aes(x=t, y=value)) + geom_line(aes(colour=variable)) 
}
```

Start with a model with 10,000 cells and 0.1 min time step. In this model $k_1 = 0.04$ and $k_2 = 0$.  Top panels show original data, lower panels - smoothed data.

```{r, fig.width=8, fig.height=3}
grid.arrange(plotTimelines(tchr), plotRates(tchr, k=1), ncol=2)
grid.arrange(plotTimelines(tchr, smooth=TRUE, k=200), plotRates(tchr, k=200), ncol=2)
```


What happens if I use only 100 cells and 1 min time step in the model? This time $k_2 = 0.04$.

```{r, fig.width=8, fig.height=3}
chr <- generateCells(chr, nsim=100)
grid.arrange(plotTimelines(chr, k=1), plotRates(chr, k=1), ncol=2)
grid.arrange(plotTimelines(chr, smooth=TRUE, k=20), plotRates(chr, k=20), ncol=2)
```


And now, for real data. Here are scramble data smoothed with running mean with window size of 10 time points.

```{r, fig.width=8, fig.height=3}
grid.arrange(plotTimelines(echr, smooth=TRUE, k=10), plotRates(echr, k=10), ncol=2)
```

Now, smoothing with 80 points.

```{r, fig.width=8, fig.height=3}
grid.arrange(plotTimelines(echr, smooth=TRUE, k=80), plotRates(echr, k=80), ncol=2)
```

## 25/07/2017

Had a chat with Tomo and John yesterday. They wanted to add a score showing how far the model is from the experimental data. I did it by calculating $\chi^2$ summed over all three curves. It is now added to the figure in the shiny app.

The next thing is to add transition P->B, with rate $k_3$. It can happen only after time $\Delta t_3$.

I added a simulation method (step-by-step). Now testing it.

```{r, fig.width=5, fig.height=3}
pars <- c3pars(
  t1 = -30,
  k1 = 0.04,
  k2 = 0.04,
  k3 = 0.04,
  dt2 = 10,
  dt3 = 50
)
chr <- ChromCom3(pars)
chr <- generateCells(chr, nsim=1000, method="simulation")
plotTimelines(chr)
```

## 26/07/2017

What about fitting model to the data? "nlm" doesn't work because model is stochastic. Method "SANN" from optim (that is stochastic simulated annealing) took very long time and went into a false minimum (negative $k_2$), as parameters cannot be constrained.

Finally I used "L-BFGS-B" method which allows box constraints. Alas, this method is not very good at finding the real minimum, often ending up in a local one. I'm guessing this is because of my stochastic model. Therefore, I need to run it several times and search for the best minimum. Using "mclapply" to speed it up.

I also changed the error score. $\chi^2$ doesn't work very well when expected counts are zero. And I cannot simply exclude them, because zero is an important number. Instead I calculate a simple RMS.

```{r, fig.width=5}
pars <- c3pars()
chr <- cacheData("fit_scramble_n3000_3par", fitChr, echr, pars, npar=3, nsim=3000, ntry=16, binDir=binDir)
plotTimelines(chr, expdata = echr, withpars=TRUE)
```

Now, the same, but with extra parameter, $\Delta t_2$.

```{r, fig.width=5}
pars <- c3pars(dt2=5)
chr <- cacheData("fit_scramble_n3000_4par", fitChr, echr, pars, npar=4, nsim=3000, ntry=16, binDir=binDir)
plotTimelines(chr, expdata = echr, withpars=TRUE)
```

And now the other two data sets.

```{r, fig.width=5}
echr2 <- experimentalData(dataFile$NCAPD2)
pars <- c3pars()
chr2 <- cacheData("fit_NCAPD2_n3000_3par", fitChr, echr2, pars, npar=3, nsim=3000, ntry=16, ncores=8, binDir=binDir)
plotTimelines(chr2, expdata = echr2, withpars=TRUE)

pars <- c3pars(dt2=10)
chr2 <- cacheData("fit_NCAPD2_n3000_4par", fitChr, echr2, pars, npar=4, nsim=3000, ntry=16, ncores=8, binDir=binDir)
plotTimelines(chr2, expdata = echr2, withpars=TRUE)
```


```{r, fig.width=5}
echr3 <- experimentalData(dataFile$NCAPD3)
pars <- c3pars()
chr3 <- cacheData("fit_NCAPD3_n3000_3par", fitChr, echr3, pars, npar=3, nsim=3000, ntry=16, ncores=8, binDir=binDir)
plotTimelines(chr3, expdata = echr3, withpars=TRUE)

pars <- c3pars(dt2=10)
chr3 <- cacheData("fit_NCAPD3_n3000_4par", fitChr, echr3, pars, npar=4, nsim=3000, ntry=16, ncores=8, binDir=binDir)
plotTimelines(chr3, expdata = echr3, withpars=TRUE)
```


>>>>>>> 01b0e8d10be96d37a253e2c8c213bf6e968042ba